We need a bunch of these…
First jsonlite for processing the colours data pulled from the website
Next, a bunch of data munging packages from the ‘tidyverse’
And finally the R (vector) spatial packages
See the Dulux website for what this is all about
Prior to building this notebook I had a poke around on the website to figure out where the colour details were to be found.
The loop on the next slide
df_colours <- NULL
for (group in colour_groups) {
source <- str_c(base_url, group) # the URL of the colour group JSON
json <- fromJSON(source, flatten = TRUE)
write_json(json, str_c(group, ".json"))
# the next line needed a bit of experimentation...
the_colours <- rbind.fill(json$categoryColours$masterColour.colours)
if (is.null(df_colours)) {
df_colours <- the_colours
} else {
df_colours <- bind_rows(df_colours, the_colours)
}
Sys.sleep(0.5)
}
write.csv(df_colours, "dulux-colours-raw.csv", row.names = FALSE)## id red green blue lrv baseId name woodType coats
## 1 149253 205 210 206 67 vivid_white Pukaki Quarter None NA
## 2 149254 220 240 242 86 vivid_white Canoe Bay None NA
## 3 149255 226 240 245 87 vivid_white Mt Dobson None NA
## 4 149256 220 230 235 80 vivid_white Raetihi None NA
## 5 149257 217 219 223 74 vivid_white Taiaroa Head None NA
## 6 149258 180 200 219 60 vivid_white Gulf Harbour None NA
There are paint names with modifiers as suffixes to specify different shades of particular colours, and we need to handle this
The modifiers are
This is a nice way to clean this up (there are others…)
What’s nice is it demonstrates pipelines using %>%
df_colours_tidied <- df_colours %>%
## remove some columns we won't be needing
select(-id, -baseId, -woodType, -coats) %>%
## separate the name components, filling from the left with NAs if <5
separate(name, into = c("p1", "p2", "p3", "p4", "p5"), sep = " ",
remove = FALSE, fill = "left") %>%
## replace any NAs with an empty string
mutate(p1 = str_replace_na(p1, ""),
p2 = str_replace_na(p2, ""),
p3 = str_replace_na(p3, ""),
p4 = str_replace_na(p4, "")) %>%
## if p5 is a paint modifiers, then recompose name
## from p1:p4 else from p1:p5
## similarly keep modifier where it exists
mutate(placename = if_else(p5 %in% paint_modifiers,
str_trim(str_c(p1, p2, p3, p4, sep = " ")),
str_trim(str_c(p1, p2, p3, p4, p5, sep = " "))),
modifier = if_else(p5 %in% paint_modifiers,
p5, "")) %>%
## remove some places that are tricky to deal with later
filter(!placename %in% c("Chatham Islands",
"Passage Rock",
"Auckland Islands",
"Cossack Rock")) %>%
## throw away variables we no longer and reorder
select(name, placename, modifier, red, green, blue)
# save it so we have it for later
write.csv(df_colours_tidied, "dulux-colours.csv", row.names = FALSE)Add x and y columns to our data for the coordinates—note that we reload from the saved file so as not to keep hitting the Dulux website
Code on the next slide:
for (placename in unique(df_colours_tidied_xy$placename)) {
address <- str_c(placename, "New Zealand", sep = ", ")
geocode <- geocode_OSM(address, as.data.frame = TRUE, return.first.only = FALSE)
num_geocodes <- nrow(geocode)
matching_rows <- which(df_colours_tidied_xy$placename == placename)
for (i in 1:length(matching_rows)) {
if (!is.null(geocode)) {
if (num_geocodes >= i) {
df_colours_tidied_xy[matching_rows[i], ]$x <- geocode$lon[i]
df_colours_tidied_xy[matching_rows[i], ]$y <- geocode$lat[i]
}
}
}
Sys.sleep(0.5) # so as not to over-tax the geocoder
}Another tidy up removing anything that didn’t get geocoded
Get the geocoded dataset we made earlier
sf point datasetdulux_colours_sf <- st_as_sf(dulux_colours,
coords = c("x", "y"), # the columns with the coordinates
crs = 4326) %>% # the project EPSG:4326 for lng-lat
st_transform(2193) %>% # convert to NZTM
## and make an RGB column
mutate(rgb = rgb(red / 255,
green / 255,
blue/ 255))
st_write(dulux_colours_sf, "dulux-colours-pts.gpkg", delete_dsn = TRUE)## Deleting source `dulux-colours-pts.gpkg' using driver `GPKG'
## Writing layer `dulux-colours-pts' to data source
## `dulux-colours-pts.gpkg' using driver `GPKG'
## Writing 904 features with 7 fields and geometry type Point.
## tmap mode set to interactive viewing
## Reading layer `nz' from data source
## `/home/osullid3/Documents/code/dulux-colours-map/nz.gpkg' using driver `GPKG'
## Simple feature collection with 1 feature and 0 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1089972 ymin: 4748123 xmax: 2091863 ymax: 6223160
## Projected CRS: NZGD2000 / New Zealand Transverse Mercator 2000
Points aren’t really much fun
Instead, make up Voronois and clip to NZ
## Reading layer `dulux-colours-vor' from data source
## `/home/osullid3/Documents/code/dulux-colours-map/dulux-colours-vor.gpkg'
## using driver `GPKG'
## Simple feature collection with 902 features and 7 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1089972 ymin: 4748123 xmax: 2091863 ymax: 6223160
## Projected CRS: NZGD2000 / New Zealand Transverse Mercator 2000